package pri.lt.parser;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.By;
import org.openqa.selenium.NoSuchElementException;
import org.openqa.selenium.WebElement;
import pri.lt.Constants;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.net.URL;
import java.net.URLConnection;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

public class SinaParser implements Parser  {

    static SinaParser sInstance;

    public synchronized static Parser getInstance() {
        if (sInstance == null) {
            sInstance = new SinaParser();
        }
        return  sInstance;
    }


    @Override
    public ParserResult parse(String url) {
        WebDriverWrapper webDriverWrapper = WebDriverManager.getWebDriverWrapper();
        try {
            String publishTime = null;
            try {
                webDriverWrapper.getWebDriver().get(url);
            } catch (Exception e) {

            }
            String pageXml = webDriverWrapper.getHtmlExcuteJs();//直接将加载完成的页面转换成xml格式的字符串


            Document doc = Jsoup.parse(pageXml, url);


            Object publishTimeObject = null;



            try {
                String srcTimeText = doc.select("div.date-source").select("span.date").text();
                srcTimeText = srcTimeText.trim();
                String year = srcTimeText.substring(0, srcTimeText.indexOf("年")).trim();
                String month = srcTimeText.substring(srcTimeText.indexOf("年") + 1, srcTimeText.indexOf("月")).trim();
                String day = srcTimeText.substring(srcTimeText.indexOf("月") + 1, srcTimeText.indexOf("日")).trim();
                String time = srcTimeText.substring(srcTimeText.indexOf(" ") + 1).trim();
                if (month.length() == 1) {
                    month = "0" + month;
                }
                if (day.length() == 1) {
                    day = "0" + day;
                }
                publishTime = year + "-" + month + "-" + day + " " + time + ":00";
            } catch (Exception e) {

            }


            if (publishTime == null || publishTime.trim().length() == 0) {
                try {
                    String srcTimeText = doc.select("p.source-time").select("span").first().text();
                    srcTimeText = srcTimeText.trim();
                    publishTime = srcTimeText + ":00";
                } catch (Exception e) {

                }

            }


            if (publishTime == null || publishTime.trim().length() == 0) {

                webDriverWrapper.release();
                return null;
            }

            System.out.println("litao url publish at " + publishTime);

            Element contentElement = doc.getElementById("article");

            if (contentElement == null) {
                contentElement = doc.select("div.article-body.main-body").first();
            }

            if (contentElement == null) {
                contentElement = doc.select("div.article").first();
            }

            if (contentElement == null) {
                webDriverWrapper.release();
                return null;
            }

            String title = null;
            try {
                Element titleElement = doc.select("h1.main-title").first();
                title = titleElement.text();
            } catch (Exception e) {

            }

            if (title == null || title.trim().length() == 0) {
                try {
                    Element titleElement = doc.select("div.article-header").select("h1").first();
                    title = titleElement.text();
                } catch (Exception e) {

                }
            }

            ParserResult parserResult = new ParserResult();

            parserResult.title = title;
            parserResult.content = contentElement.html();
            //parserResult.allHtml = pageXml;
            try {
                parserResult.publishTime = new SimpleDateFormat(Constants.DATETIME_FORMAT).parse(publishTime);

            } catch (Exception e) {

                e.printStackTrace();
            }


            try {
                Element commentLink = doc.select("div.sina-comment-form.sina-comment-top").first();
                parserResult.commentUrl = commentLink.select("a").first().attr("href");
            } catch (Exception e) {
                try {
                    Element commentLink = doc.select("div.article-body.main-body").first();
                    parserResult.commentUrl = commentLink.select("a").first().attr("href");
                } catch (Exception e2) {
                    System.out.println("get comment link exception " + url);
                    webDriverWrapper.release();
                }
            } finally {

                webDriverWrapper.release();
            }

            Elements elements = contentElement.select("img[src]");//获取到的值为所有的<img src="...">

            int i = 0;
            for (Element element : elements) {
                try {

                    String imgUrl = element.attr("src");//获取到src的值
                    if (imgUrl != null) {
                        if (imgUrl.startsWith("http")) {

                        } else {
                            if (url.startsWith("https")) {
                                imgUrl = "https:" + imgUrl;
                            } else if (url.startsWith("http")) {
                                imgUrl = "http:" + imgUrl;
                            }
                        }
                        URL imgHttpUrl = new URL(imgUrl);
                        URLConnection connection = imgHttpUrl.openConnection();
                        connection.setDoOutput(true);
                        BufferedImage image = ImageIO.read(connection.getInputStream());
                        int srcWidth = image .getWidth();      // 源图宽度
                        int srcHeight = image .getHeight();    // 源图高度

                        if (srcWidth > 0 && srcHeight > 0 && srcWidth >= 500) {
                            float radio = ((float)srcHeight) / ((float)srcWidth);
                            if (radio > 0.25 && radio <= 2) {
                                parserResult.thumbnailUrl = imgUrl;
                                break;
                            }
                        }

                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }



            if (!parserResult.isValid()) {
                webDriverWrapper.release();
                return null;
            }


            return parserResult;
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            webDriverWrapper.release();
        }
        return null;
    }

    @Override
    public HotCommentResult getHotCommentResult(String url, ParserResult parserResult) {
        String commentCountString = "";
        WebDriverWrapper webDriverWrapper = WebDriverManager.getWebDriverWrapper();
        List<HotComment> comments = new ArrayList<>();
        String pagehtml = "";
        try {

            webDriverWrapper.getWebDriver().get(parserResult.commentUrl);
            pagehtml = webDriverWrapper.getHtmlExcuteJs();//直接将加载完成的页面转换成xml格式的字符串

        } catch (Exception e) {
            System.out.println("getHotCommentResult get Exception url = " + url);
            System.out.println("getHotCommentResult get Exception pagehtml = " + pagehtml);
            return null;
        } finally {

            webDriverWrapper.release();
        }

        Document doc = Jsoup.parse(pagehtml, url);

        try {
            commentCountString = doc.select("div.sina-comment-form.sina-comment-form-top").select("em").get(1).text();

            Elements commentElements = doc.select("div.sina-comment-wrap").select("div.hot-wrap")
                    .select("div.list").first().getElementsByClass("item clearfix").select("div.txt");
            for (int i = 0; i < commentElements.size(); i++) {
                if (i == 10) {
                    break;
                }
                HotComment hotComment = new HotComment();
                Elements isUserReply = commentElements.get(i).select("span");
                if (isUserReply != null && isUserReply.size() > 0) {
                    continue;
                }
                hotComment.content = commentElements.get(i).text();
                comments.add(hotComment);
            }
        } catch (Exception e) {
            return null;
        }


        int commentCount = 0;
        try {
            commentCount = Integer.parseInt(commentCountString);
        } catch (Exception e) {

        }
        boolean isHot = false;
        if (commentCount > 50)  {
            isHot = true;
        } else if (commentCount > 10) {
            Date now = new Date();
            long publishElapseTime = now.getTime() - parserResult.publishTime.getTime();
            long pushLishElapseHour = publishElapseTime / (1000 * 60 * 60);
            if (commentCount > (pushLishElapseHour + 1) * 10) {
                isHot = true;
            }
        }

        HotCommentResult hotCommentResult = new HotCommentResult();
        hotCommentResult.isHot = isHot;
        hotCommentResult.commentCount = commentCount;
        hotCommentResult.crawlerReplyRankCount = (double) commentCount / (double)50;
        hotCommentResult.comments = comments;
        return hotCommentResult;
    }

    @Override
    public boolean isDomainSupport(String url) {
        return url != null && url.contains("sina.com.cn");
    }

    @Override
    public boolean isUrlCanParse(String url) {
        return url != null && url.contains("sina.com.cn") && url.endsWith("shtml");
    }

    @Override
    public boolean isDomainDirectory(String url) {
        return url != null && url.contains("sina.com.cn") &&
                (url.endsWith("/") || url.endsWith("sina.com.cn"));
    }


}
